/*******************************************************************************

  Paying Outsourced Labor: Direct Evidence from Linked Temp Agency-Worker-Client Data

  By Andres Drenik, Simon Jäger, Pascuel Plotkin and Benjamin Schoefer
  January 7th, 2021

	DESCRIPTION: Creates Appendix Table A.1

*******************************************************************************/




/********************************************************************************
***** Preliminaries
********************************************************************************/
set more off
cap log close
local curr_date = c(current_date)
log using "${logs}/TabA1`curr_date'", replace


/****************************************************************************************
* Looping over the 2010 - 2017 filtered datasets, to generate one unique dataset
****************************************************************************************/


forvalues y = 2010/2017{

	use "${Data_with_filter}/Argentina_Clean_`y'.dta", clear

/****************************************************************************************
* Drop useless observations for our analysis
	* Drop duplicated observations for temp workers (modalidad = 102)
	* Only keep private sector workers
	* Only keep workers that have 1 user firm per month
****************************************************************************************/

	drop if modalidad == 102
	drop if public_worker == 1
	drop if multiple_user_firm == 1
	drop if base_registro_match == 2

/****************************************************************************************
* Drop useless variables
****************************************************************************************/

	capture drop base_registro_match
	capture drop multiple_user_firm
	capture drop cuit_user_firm2
	capture drop cuit_user_firm3
	capture drop cuit_user_firm4
	capture drop cuit_user_firm5

*************

	* Rename variable that flags the user firm
	rename cuit_user_firm1 cuit_user_firm

	* Generate 2 digit industry code
	gen industry_code_2digit=int(ciiu_4/100)

	*Drop observations that don't register a wage
	drop if remuner_total == .

    *Flag Worker per date
	bys cuil_trab date (remuner_total) : gen worker = (_n == _N)
	gen total_obs = 1

    *Flag Part time Jobs
	gen part_time = (modalidad == 1 | modalidad == 21 | modalidad == 304 | modalidad = 305 | modalidad == 306)
	bys cuil_trab date : gegen worker_part_time_aux = sum(part_time)
    gen worker_part_time = (worker_part_time_aux > 0 & worker == 1)
    drop worker_part_time_aux

    *flag Mutliple job holders
	duplicates tag cuil_trab date, gen(duplicate)
	gen worker_multiple_job = (duplicate > 0 & worker==1)

	*Flag Gender
	gen female = (gender == 2 & worker == 1)
	gen male = (gender == 1 & worker == 1)
    qui sum gender if gender == 2 & worker ==1
	local female = r(sum_w)

    *Mean and Median age for workers
	qui sum age if worker ==1, detail
	local mean_age = r(mean)
	local median_age = r(p50)
	gen mean_age = `mean_age'
	gen median_age =`median_age'

    *Share per Gender
	qui sum worker if worker ==1
	local total = r(sum_w)
	local perc_female = `female'/`total'
	local perc_male = 1 - `perc_female'
	gen perc_female = `perc_female'
	gen perc_male = `perc_male'

    * Meidan and Mean Wages (in Argentine Pesos)
	qui sum remuner_total if worker ==1, detail
	local mean_remuner_total = r(mean)
	local median_remuner_total = r(p50)
	gen mean_remuner_total = `mean_remuner_total'
	gen median_remuner_total =`median_remuner_total'

	gcollapse (mean) year perc_* mean_* median_* (sum) worker total_obs worker_multiple_job worker_part_time male female, by(date industry_code_2digit) fast
	save "${intermediate_data_emp}/kk_table1_`y'.dta", replace
}


*************************************
*Append and variables by industry letter
*************************************

	use "${intermediate_data_emp}/kk_table1_2010.dta", clear
	forvalues y = 2011/2017{
		append using "${intermediate_data_emp}/kk_table1_`y'.dta"
	}

	save "${intermediate_data_emp}/kk_table1.dta", replace

	*labels
	gen industry_code_letter = 1 if inrange(industry_code_2digit, 1, 5)
	replace industry_code_letter = 2 if inrange(industry_code_2digit, 10, 14)
	replace industry_code_letter = 3 if inrange(industry_code_2digit, 40, 41)
	replace industry_code_letter = 4 if industry_code_2digit == 45
	replace industry_code_letter = 5 if inrange(industry_code_2digit, 15, 37)
	replace industry_code_letter = 6 if industry_code_2digit == 51
	replace industry_code_letter = 7 if industry_code_2digit == 50 | industry_code_2digit == 52
	replace industry_code_letter = 8 if inrange(industry_code_2digit, 60, 64)

	replace industry_code_letter = 10 if inrange(industry_code_2digit, 65, 67)
	replace industry_code_letter = 11 if inrange(industry_code_2digit, 70, 74)
	replace industry_code_letter = 12 if industry_code_2digit == 80 | industry_code_2digit == 85
	replace industry_code_letter = 13 if industry_code_2digit == 55
	replace industry_code_letter = 14 if inrange(industry_code_2digit, 90, 93) | industry_code_2digit==99 | industry_code_2digit==.

	replace industry_code_letter = 16 if industry_code_2digit == 75
	replace industry_code_letter = 17 if inrange(industry_code_2di	git, 95, 97)
	replace industry_code_letter = 18 if industry_code_2digit == 0 | industry_code_2digit == .

	*RETAIL TRADE INCLUDES CAR TRADES
	label define industry 1 "Agriculture, Foresty, Fishing and Hunting" 2 "Mining" 3 "Utilities" 4 "Construction" 5 "Manufacturing" 6 "Wholsale Trade" 7 "Retail Trade" 8 "Transportation Warehousing and comunication" 9 "Information" 10 "Financial activities" 11 "Professional and Business Services" 12 "Education and Health Services" 13 "Leisure and Hospitality" 14 "Other Services (Excluding Public Administration)" 15 "Public Administration" 16 "Temporary work agents" 17 "Activities of private households as employers and undifferentiated production activities of private households" 18 "Other (public administration?)", modify
	label values industry_code_letter industry

	*Workers by industry
	egen workers_industry_letter = sum(worker), by(industry_code_letter)
	egen workers_industry_letter_year = sum(worker), by(industry_code_letter year)
	egen total_workers_year = sum(worker), by(year)
    egen avg_worker_year = total_workers_year/12
	gen perc_workers_industry_year = workers_industry_letter_year / total_workers_year

	*Merge with inflation dataset
	merge m:1 year using "${input}/ArgentinaPriceLevel.dta"
	drop if _merge==2
	drop _merge

	sum index if year == 2017
	local index_2017 = r(mean)

	*Inflate wages to 2017
	gen coefficient_for_real_wage = (`index_2017'/index)
	gen mean_real_wage_2017 = coefficient_for_real_wage * mean_remuner_total
	gen median_real_wage_2017 = coefficient_for_real_wage * median_remuner_total

	*Average exchange rate for the last two years in Argentina (2017-2016)
	local average_exchange_rate = 21.39

	*Wages in dollars
	gen mean_wage_in_dollars = mean_real_wage_2017/`average_exchange_rate'
	gen median_wage_in_dollars = median_real_wage_2017/`average_exchange_rate'

	egen multiple_job_year = sum(worker_multiple_job), by(year)
	egen part_time_year = sum(worker_part_time), by(year)
	egen total_worker_year = sum(worker), by(year)
	gen multiple_job_percent = multiple_job_year / total_worker_year
	gen part_time_percent = part_time_year / total_worker_year

	egen total_obs_year = sum(total_obs), by(year)

	save "${intermediate_data_emp}/kk_table1.dta", replace

	*********************
	******* TABLE *******
	*********************
	file open table1_kk using "${Results}\table1_kk.tex", write replace
	file write table1_kk "\begin{table}[]" _n
    file write table1_kk "\begin{tabular}{lcccccc}" _n "\hline" _n
    file write table1_kk "\multicolumn{1}{c}{} & \multicolumn{3}{c}{SIPA Dataset} & \multicolumn{3}{c}{US Survey (Katz \& Krueger)} \\ \hline" _n
    file write table1_kk "\multicolumn{1}{c}{} &  &  &  & Unweight. & Weight. & Alt. Weight \\ \cline{2-7}" _n
	file write table1_kk "\multicolumn{1}{c}{} & \multicolumn{3}{c}{Years} &  &  &  \\ \cline{2-7}" _n
	file write table1_kk "\multicolumn{1}{c}{(Average for all registered workers during each year)} & 2011 & 2014 & 2017 & \multicolumn{3}{c}{2015} \\ \hline" _n

	local varlist mean_age median_age mean_wage_in_dollars median_wage_in_dollars
	foreach x of local varlist {
	foreach i of numlist 2011 2014 2017 {
		qui sum `x' if year == `i'
		local `x'_`i' = round(r(mean), .1)
		local `x'_`i' = (substr(string(``x'_`i'',"%6.0f"),1,6))
	}
	}

	foreach i of numlist 2011 2014 2017 {
		qui sum perc_female if year == `i'
		local perc_female_`i' = round(r(mean)*100, .1)
		local perc_female_`i' = (substr(string(`perc_female_`i'',"%6.1f"),1,5))
	}

	foreach i of numlist 2011 2014 2017 {
		qui sum part_time_percent if year == `i'
		local part_time_percent_`i' = round(r(mean)*100, .1)
		local part_time_percent_`i' = (substr(string(`part_time_percent_`i'',"%6.1f"),1,5))
	}

	foreach i of numlist 2011 2014 2017 {
		qui sum multiple_job_percent if year == `i'
		local multiple_job_percent_`i' = round(r(mean)*100, .1)
		local multiple_job_percent_`i' = (substr(string(`multiple_job_percent_`i'',"%6.1f"),1,5))
	}

	foreach i of numlist 2011 2014 2017 {
		qui sum avg_worker_year if year == `i'
		local avg_worker_year_`i' = r(mean)
		local avg_worker_year_`i' = (substr(string(`avg_worker_year_`i'',"%15.0f"),1,15))
		dis `avg_worker_year_`i''
	}

    file write table1_kk "Median Age (years) & `median_age_2011' & `median_age_2014' & \multicolumn{1}{c|}{`median_age_2017'} & 50 & 41 & 41 \\" _n
	file write table1_kk "Mean Age (years) & `mean_age_2011' & `mean_age_2014' & \multicolumn{1}{c|}{`mean_age_2017'} & 48.3 & 42.6 & 42.5 \\" _n
	file write table1_kk "Median Wage (dollars) & `median_wage_in_dollars_2011' & `median_wage_in_dollars_2014' & \multicolumn{1}{c|}{`median_wage_in_dollars_2017'} &  &  &  \\" _n
    file write table1_kk "Mean Wage (dollars) & `mean_wage_in_dollars_2011' & `mean_wage_in_dollars_2014' & \multicolumn{1}{c|}{`mean_wage_in_dollars_2017'} &  &  &  \\" _n
    file write table1_kk "Female (percent) & `perc_female_2011' & `perc_female_2014' & \multicolumn{1}{c|}{`perc_female_2017'} & 55.5 & 47.1 & 47.1 \\" _n
	file write table1_kk "Multiple Jobholder & `multiple_job_percent_2011' & `multiple_job_percent_2014' & \multicolumn{1}{c|}{`multiple_job_percent_2017'} & 14.3 & 13.2 & 13.1 \\" _n
	file write table1_kk "In Labor Force (Percent of Population) & 46.3 & 44.9 & \multicolumn{1}{c|}{45.9} & 62.8 & 67.5 & 67.5 \\" _n
	file write table1_kk "Part-Time Employment & `part_time_percent_2011' & `part_time_percent_2014' & \multicolumn{1}{c|}{`part_time_percent_2017'} & 26.2 & 24.2 & 23.5 \\" _n
	file write table1_kk "\textbf{Industry (percent):} &  &  & \multicolumn{1}{c|}{} &  &  &  \\" _n

	forvalues x = 1/18 {
	foreach i of numlist 2011 2014 2017 {
		qui sum perc_workers_industry_year if industry_code_letter == `x' & year == `i'
		local `x'_`i' = round(r(mean)*100, .1)
		local `x'_`i' = (substr(string(``x'_`i'',"%6.1f"),1,5))
	}
	}

	file write table1_kk "Agriculture, Forestry, Fishing and Hunting & `1_2011' & `1_2014' & \multicolumn{1}{c|}{`1_2017'} & 1.0 & 1.6 & 1.5 \\" _n
	file write table1_kk "Mining & `2_2011' & `2_2014' & \multicolumn{1}{c|}{`2_2017'} & 0.6 & 0.5 & 0.5 \\" _n
	file write table1_kk "Utilities & `3_2011' & `3_2014' & \multicolumn{1}{c|}{`3_2017'} & 0.5 & 0.9 & 0.9 \\" _n
	file write table1_kk "Construction & `4_2011' & `4_2014' & \multicolumn{1}{c|}{`4_2017'} & 3.1 & 4.1 & 3.9 \\" _n
	file write table1_kk "Manufacturing & `5_2011' & `5_2014' & \multicolumn{1}{c|}{`5_2017'} & 7.3 & 8.6 & 8.8 \\" _n
	file write table1_kk "Wholesale Trade & `6_2011' & `6_2014' & \multicolumn{1}{c|}{`6_2017'} & 2.6 & 2.2 & 2.2 \\" _n
	file write table1_kk "Retail Trade & `7_2011' & `7_2014' & \multicolumn{1}{c|}{`7_2017'} & 8.7 & 9.6 & 9.6 \\" _n
	file write table1_kk "Transportation Warehousing and communication & `8_2011' & `8_2014' & \multicolumn{1}{c|}{`8_2017'} & 6.4 & 9 & 9.2 \\" _n
	file write table1_kk "Financial activities & `10_2011' & `10_2014' & \multicolumn{1}{c|}{`10_2017'} & 9.2 & 9.2 & 9.2 \\" _n
	file write table1_kk "Professional and Business Services & `11_2011' & `11_2014' & \multicolumn{1}{c|}{`11_2017'} & 14.5 & 13.4 & 13.2 \\" _n
	file write table1_kk "Education and Health Services & `12_2011' & `12_2014' & \multicolumn{1}{c|}{`12_2017'} & 26.0 & 22.4 & 22.5 \\" _n
	file write table1_kk "Leisure and Hospitality & `13_2011' & `13_2014' & \multicolumn{1}{c|}{`13_2017'} & 5.4 & 6.0 & 6.0 \\" _n
	file write table1_kk "Other Services (Excluding Public Administration) & `14_2011' & `14_2014' & \multicolumn{1}{c|}{`14_2017'} & 5.2 & 4.8 & 4.7 \\" _n
	file write table1_kk "Temporary work agents & `16_2011' & `16_2014' & \multicolumn{1}{c|}{`16_2017'} &  & 1.6 & 1.6 \\" _n
	file write table1_kk " &  &  & \multicolumn{1}{c|}{} &  &  &  \\" _n
	file write table1_kk "\textbf{Avg. Workers} & 4,225,916 & 4,261,083 & \multicolumn{1}{c|}{4,296,090} &  &  &  \\ \hline" _n
	file write table1_kk "\end{tabular}" _n
	file write table1_kk "\end{table}"_n
	file close table1_kk

log close
